
// OCR language list for Tesseract 4.00
const baseOcrLanguages = [
    { label: 'Afrikaans', value: 'afr' },
    { label: 'Amharic', value: 'amh' },
    { label: 'Arabic', value: 'ara' },
    { label: 'Assamese', value: 'asm' },
    { label: 'Azerbaijani', value: 'aze' },
    { label: 'Azerbaijani - Cyrillic', value: 'aze_cyrl' },
    { label: 'Belarusian', value: 'bel' },
    { label: 'Bengali', value: 'ben' },
    { label: 'Tibetan', value: 'bod' },
    { label: 'Bosnian', value: 'bos' },
    { label: 'Bulgarian', value: 'bul' },
    { label: 'Catalan; Valencian', value: 'cat' },
    { label: 'Cebuano', value: 'ceb' },
    { label: 'Czech', value: 'ces' },
    { label: 'Chinese - Simplified', value: 'chi_sim' },
    { label: 'Chinese - Traditional', value: 'chi_tra' },
    { label: 'Cherokee', value: 'chr' },
    { label: 'Welsh', value: 'cym' },
    { label: 'Danish', value: 'dan' },
    { label: 'German', value: 'deu' },
    { label: 'Dzongkha', value: 'dzo' },
    { label: 'Greek, Modern (1453-)', value: 'ell' },
    { label: 'English', value: 'eng' },
    { label: 'English, Middle (1100-1500)', value: 'enm' },
    { label: 'Esperanto', value: 'epo' },
    { label: 'Estonian', value: 'est' },
    { label: 'Basque', value: 'eus' },
    { label: 'Persian', value: 'fas' },
    { label: 'Finnish', value: 'fin' },
    { label: 'French', value: 'fra' },
    { label: 'German Fraktur', value: 'frk' },
    { label: 'French, Middle (ca. 1400-1600)', value: 'frm' },
    { label: 'Irish', value: 'gle' },
    { label: 'Galician', value: 'glg' },
    { label: 'Greek, Ancient (-1453)', value: 'grc' },
    { label: 'Gujarati', value: 'guj' },
    { label: 'Haitian; Haitian Creole', value: 'hat' },
    { label: 'Hebrew', value: 'heb' },
    { label: 'Hindi', value: 'hin' },
    { label: 'Croatian', value: 'hrv' },
    { label: 'Hungarian', value: 'hun' },
    { label: 'Inuktitut', value: 'iku' },
    { label: 'Indonesian', value: 'ind' },
    { label: 'Icelandic', value: 'isl' },
    { label: 'Italian', value: 'ita' },
    { label: 'Italian - Old', value: 'ita_old' },
    { label: 'Javanese', value: 'jav' },
    { label: 'Japanese', value: 'jpn' },
    { label: 'Kannada', value: 'kan' },
    { label: 'Georgian', value: 'kat' },
    { label: 'Georgian - Old', value: 'kat_old' },
    { label: 'Kazakh', value: 'kaz' },
    { label: 'Central Khmer', value: 'khm' },
    { label: 'Kirghiz; Kyrgyz', value: 'kir' },
    { label: 'Korean', value: 'kor' },
    { label: 'Kurdish', value: 'kur' },
    { label: 'Lao', value: 'lao' },
    { label: 'Latin', value: 'lat' },
    { label: 'Latvian', value: 'lav' },
    { label: 'Lithuanian', value: 'lit' },
    { label: 'Malayalam', value: 'mal' },
    { label: 'Marathi', value: 'mar' },
    { label: 'Macedonian', value: 'mkd' },
    { label: 'Maltese', value: 'mlt' },
    { label: 'Malay', value: 'msa' },
    { label: 'Burmese', value: 'mya' },
    { label: 'Nepali', value: 'nep' },
    { label: 'Dutch; Flemish', value: 'nld' },
    { label: 'Norwegian', value: 'nor' },
    { label: 'Oriya', value: 'ori' },
    { label: 'Panjabi; Punjabi', value: 'pan' },
    { label: 'Polish', value: 'pol' },
    { label: 'Portuguese', value: 'por' },
    { label: 'Pushto; Pashto', value: 'pus' },
    { label: 'Romanian; Moldavian; Moldovan', value: 'ron' },
    { label: 'Russian', value: 'rus' },
    { label: 'Sanskrit', value: 'san' },
    { label: 'Sinhala; Sinhalese', value: 'sin' },
    { label: 'Slovak', value: 'slk' },
    { label: 'Slovenian', value: 'slv' },
    { label: 'Spanish; Castilian', value: 'spa' },
    { label: 'Spanish; Castilian - Old', value: 'spa_old' },
    { label: 'Albanian', value: 'sqi' },
    { label: 'Serbian', value: 'srp' },
    { label: 'Serbian - Latin', value: 'srp_latn' },
    { label: 'Swahili', value: 'swa' },
    { label: 'Swedish', value: 'swe' },
    { label: 'Syriac', value: 'syr' },
    { label: 'Tamil', value: 'tam' },
    { label: 'Telugu', value: 'tel' },
    { label: 'Tajik', value: 'tgk' },
    { label: 'Tagalog', value: 'tgl' },
    { label: 'Thai', value: 'tha' },
    { label: 'Tigrinya', value: 'tir' },
    { label: 'Turkish', value: 'tur' },
    { label: 'Uighur; Uyghur', value: 'uig' },
    { label: 'Ukrainian', value: 'ukr' },
    { label: 'Urdu', value: 'urd' },
    { label: 'Uzbek', value: 'uzb' },
    { label: 'Uzbek - Cyrillic', value: 'uzb_cyrl' },
    { label: 'Vietnamese', value: 'vie' },
    { label: 'Yiddish', value: 'yid' },
];

export const ocrLanguages = (
    import.meta.env.BROWSER === 'edge'
        ? baseOcrLanguages
        : [
            ...baseOcrLanguages,
            { label: 'English (Offline)', value: 'eng-fast' },
        ]
);


export const getDefaultOcrLanguage = () => {
    return import.meta.env.BROWSER === 'edge' ? 'eng' : 'eng-fast';
};